/*
Replicate tables and appendix material from:
Title: Last-Mile Delivery Increases Vaccine Uptake in Sierra Leone
Authors: Niccolò F. Meriggi, Maarten Voors, Madison Levine, Vasudha Ramakrishna, Desmond Maada Kangbai ///
Michael Rozelle, Ella Tyler,. Sellu Kallon, Junisa Nabieu, Sarah Cundy and Ahmed Mushfiq Mobarak
Journal: Nature

NOTE: 
Running this file requires installation of several user written commands. These
commands are set to be installed by this dofile if you do not already have them.

Unzip "data.zip"

The dofile is set to export all materials to either the "tables" or 
"graphs" folder, which it will create inside your specified directory.
	
	This do file uses the following de-identified datasets, with corresponding code books :
	
		lit_review.dta
		individual_level.dta
		community_data.dta

	and following outputs for figures in the main text, extended data and supplementary information: 
	
		FIGURE 2: vacrate_pooled.png
		FIGURE 3: vaccount_pooled.png
		FIGURE 4: knowledge_attitudes.png
		FIGURE 5: respondent_characteristics.png
		FIGURE 6: literature_effects.png
		FIGURE 7: literature_cost.png
		SI FIGURE A2: comm_vax_rate_dist.png
		SI FIGURE A3: comm_vax_count_dist.png
		SI FIGURE A4: vac_team.png

	and generates the following tables in the main text, extended data and supplementary information:

		TABLE 1: itt_over_treatments.tex
		TABLE 2: census_bal.tex
		TABLE 3: itt_rate.tex
		TABLE 4: itt_count.tex
		TABLE 5: att_willing.tex
		TABLE 6: itt_knowledge.tex
		TABLE 7: itt_trust.tex
		TABLE 8: itt_demographic.tex
		TABLE 10: sample_diff.xlsx
		
		SI TABLE Table A1:
		effect_size_table_1.tex
		effect_size_table_2.tex
		effect_size_table_3.tex
		effect_size_table_4.tex
		effect_size_table_5.tex
		effect_size_table_6.tex
		effect_size_table_7.tex
		effect_size_table_8.tex
		effect_size_table_9.tex
		effect_size_table_10.tex
		effect_size_table_11.tex
		effect_size_table_12.tex
		effect_size_table_13.tex
		effect_size_table_14.tex
		effect_size_table_15.tex
		effect_size_table_16.tex
	
	
last modified 9 November 2023
*/

*===============================================================================
* 	Setup
*===============================================================================

clear all
version 17
set more off
set maxvar 30000		
cap log close
cap set trace off
set tracedepth 5
set linesize 80
frames reset
set graphics on

cap set scheme white_w3d
graph set window fontface Arial

*===============================================================================
* 	Macros
*===============================================================================

// fill in your working directory here
global directory "" 

if "$directory" == "" {
	
	display as error "You have not filled in your working directory in the dofile"
	break
	
}

cd 				"${directory}"
global data 	"${directory}/data"
global graphs	"${directory}/graphs"
global tables	"${directory}/tables"

foreach folder in "${graphs}" "${tables}" {
	
	cap mkdir "`folder'"
	
}

global covariates vaccinated_baseline_18 perc_emp_ag radio_ownership ///
		breast own_land

*===============================================================================
* 	Dependencies
*===============================================================================

local dependencies splitvallabels estout stripplot unique coefplot texsave ///
		renvars ereplace

foreach dependency in `dependencies' {
	
	which `dependency'
	if _rc == 111 {
		
		ssc install `dependency'
		
	}
	
}

*===============================================================================
* 	In-Text Calculations
*===============================================================================

/*
	"
	The average number of people vaccinated from within each community increases 
	from about 9 people pre-intervention to 55 people within the intervention 
	period of about 2-3 days.
	"
*/

use "${data}/individual_level.dta", clear

* subset to treatment villages
keep if any_treat == 1

* collapse to village level
collapse (sum) vaccinated_baseline vaccinated_endline, ///
	by(community_code)
	
sum vaccinated_baseline, detail
sum vaccinated_endline, detail 

/* 
"
at a cost of US$33 per person vaccinated
"
*/

use "${data}/individual_level.dta", clear

* divide total costs over # vaccinated
sum vaccinated_team
di 156023.5/`r(sum)'

/* 
	"
	To benchmark our results against other vaccination strategies, we conduct a 
	comprehensive systematic review that identifies 234 unique interventions 
	in 144 RCT studies that use information, nudges, community engagement, 
	social signaling, non-financial and financial incentives to increase 
	vaccination rates across many settings around the world.
	"
*/ 

use "${data}/lit_review.dta", clear
unique study if author != "THIS STUDY"

/*
	"
	Over a third of these interventions produce null effects 
	"
	
	"
	35.04% of all treatments reviewed had an insignificant effect on vaccine uptake
	"
	
*/

gen is_null = effectsize == 0
noisily tab is_null if author != "THIS STUDY"

/*
	"
	Our access intervention produces a larger percentage point effect size than 
	between 223 (95%) of the treatments reviewed.
	"
*/

count if effectsize < 26.1 & !missing(effectsize) & author != "THIS STUDY"
local count = `r(N)'
count if !missing(effectsize) & author != "THIS STUDY"
local rate : display %7.0f (100*`count')/`r(N)'
noisily display "`count' studies have an effect size of < 26.1" _n ///
		"This constitutes `rate'% of all treatments assessed"
		
		
/* "
	To calculate a village level vaccination rate, we had to first 
	enumerate the  population in all 150 treatment and control villages. Such 
	community census lists typically do not exist in Sierra Leone. Our research 
	team therefore walked to all structures in every village to tally the 
	number of households (39 on average, SD = 23), and the number of 
	individuals living in those households (29,588 individuals across the 
	150 villages, or about 197 people per village).
	"
*/

use "${data}/individual_level.dta", clear

keep if in_census == 1

// collapse to community level
collapse (max) number_of_people = villsize ///
	number_of_households = hhs_in_community ///
	, by(community_code)

sum number_of_people
local mn : display %7.0f `r(mean)'
local ttl : display %7.0fc `r(sum)'
noisily display as result "Mean number of people per community: `mn'"
noisily display as result "Total number of people enumerated: `ttl'"

sum number_of_households
local mn : display %7.0fc `r(mean)'
local stde : display %7.0f `r(sd)'
noisily display as result "Mean number of households per community: `mn'"
noisily display as result "Standard deviation of number of households per community: `stde'"

/*
	"
	The population of these villages was on average 22.3 years old, 
	26.5% of households were female-headed, 64.5% of people lived in a 
	household of 6 or fewer people. Only 20.1% lived in a household where 
	the household head had any form of formal schooling, and about 86.1% 
	lived in a household where the head was primarily engaged in farming.
	"
*/

use "${data}/individual_level.dta", clear

keep if in_census == 1

noisily sum age
noisily tab hh_gender
noisily tab hh_size
noisily tab anyschooling
noisily tab farmer

/*
	"
	...at baseline the average vaccination rate in control villages was 6.2%, 
	compared to 9.5% in treatment villages (difference 0.03, p = 0.015). 
	Post intervention, the vaccination rate increased to 30.2% in treatment 
	villages.
	"
*/

*subset to adults
keep if above18 == 1

noisily tab vaccinated_baseline if any_treat == 0
noisily tab vaccinated_baseline if any_treat == 1
noisily ttest vaccinated_baseline, by(any_treat)
areg vaccinated_baseline any_treat, absorb(grpID) cluster(community_code)
noisily tab vaccinated_endline if any_treat == 1

/*
	"
	In 2 out of the 100 treatment villages there was zero increase in 
	vaccinations because the mobile team did not receive permission from 
	village authorities
	"
*/

use "${data}/individual_level.dta", clear
bysort community_code: egen intervention_attempted = max(vaccinated_team)
egen comtag = tag(community_code)
noisily tab intervention_attempted if any_treat == 1 & comtag

/*
	"
	in five villages, over 50% of adults enumerated in the community census 
	were vaccinated during the course of our intervention
	"
*/

*subset to treatment
keep if any_treat == 1 & above18 == 1 & in_census == 1

collapse (sum) vaccinated_team (count) adults_in_census = any_treat, by(community_code)
gen pct_vaccinated = (vaccinated_team * 100)/adults_in_census
count if pct_vaccinated >= 50

/*
	"
	At baseline there were on average about five people vaccinated in control villages, and about 8 people in treatment 
	villages (difference 3.45, p < 0.056)
	"
*/

use "${data}/individual_level.dta", clear

keep if age >= 12 & !missing(age)

collapse (sum) vaccinated_baseline (max) any_treat, by(community_code)

noisily bysort any_treat: sum vaccinated_baseline 
noisily ttest vaccinated_baseline, by(any_treat)

/*
	"
	Amongst individuals vaccinated who were not enumerated in the census, 
	53% (12-13 people per treatment community) were visitors who came in 
	from nearby villages to get vaccinated, whilst the remaining 47% 
	(11-12 people) included short-term, circular commuters or migrant returnees 
	who were not present on the day of the census and could not be matched to 
	our listing records, as well as individuals whose "community of origin" 
	could not be determined.
	"
*/

use "${data}/individual_level.dta", clear

keep if any_treat == 1 & vaccinated_team == 1

gen vax_provenance = .
replace vax_provenance = 1 if in_census == 1
replace vax_provenance = 2 if in_census == 0 & diff_comm == 1
replace vax_provenance = 3 if in_census == 0 & diff_comm != 1
label define provenance 1 "From community census" ///
	2 "Different community" ///
	3 "Other"
label values vax_provenance provenance

assert !missing(vax_provenance)
tab vax_provenance if vax_provenance != 1

/*
	"
	In total the teams vaccinated 4,771 people aged 12 or 
	above. Of these 39% received a Johnson & Johnson vaccine, 29% Pfizer, 
	17% Sinofarm and 16% received AstraZeneca
	"
*/

use "${data}/individual_level.dta", clear

* of the 4771 people, 4765 consented to the interview
tab vaccine_type

/*
	"
	41% of households participated in these meetings. 44% of those who chose 
	to attend the meeting subsequently chose to get vaccinated.
	"
*/

use "${data}/individual_level.dta", clear
keep if any_treat == 1 & in_census == 1

preserve
collapse (max) attended, by(hh_id)
noisily tab attended
restore

noisily bysort attended: tab vaccinated_team

/*
	"
	Of the 234 different treatments identified in our systematic literature 
	review, only 33 (14.10%) directly stated the cost of the intervention per 
	successfully administered vaccination.
	"
*/

use "${data}/lit_review.dta", clear

gen cost_stated = !missing(cost_effect) | costeffectiveness == "same as other arms of study"
tab cost_stated if author != "THIS STUDY"
unique study if costeffectiveness == "same as other arms of study"

/*
	"
	The mean value in Figure 7 is US$102 (SD = 162), even 
	after top-coding the most expensive approaches.
	"
*/

replace cost_effect = 600 if study == 96 & order == 229
sum cost_effect if author != "THIS STUDY"

/*
	"
	Due to logistical complexities and costs, in some communities mobilizers 
	did not include very remote village structures (more than 15 minutes walk 
	from the village center. This excluded a total of 10 structures (including 
	40 people aged 12 and above).
	"
*/

use "${data}/individual_level.dta", clear
unique structure_id if periphery == 1 & dtd != 1 & age >= 12 & !missing(age)

*===============================================================================
* 	Figures
*===============================================================================

*------------------------------------------------------------------
*	Figure 2: Vaccination Rate Amongst Adults Enumerated During 
*	Census Before and After Mobile Vaccination Program
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear

keep if above18 == 1 & in_census == 1

/// collapse to summary stats at treatment status level
collapse (mean) mean_baseline = vaccinated_baseline ///
		mean_endline = vaccinated_endline ///
		(semean) se_baseline = vaccinated_baseline ///
		se_endline = vaccinated_endline ///
		(count) n_baseline = vaccinated_baseline ///
		n_endline = vaccinated_endline ///
		, by(any_treat)

/// generate plottable variables
foreach survey in endline baseline {
	
	gen se_upper_`survey' = mean_`survey' + ///
		se_`survey'
		
	gen se_lower_`survey' = mean_`survey' - ///
		se_`survey'
		
	gen label_`survey' = round(mean_`survey', 0.0001)
	format label_`survey' %9.3f
	
	gen mean_`survey'_grapos = mean_`survey' + 0.01
	
}


graph twoway /// overlay several plots
	/// baseline vaccination rates
	(bar mean_endline any_treat if any_treat == 1, ///
	barwidth(0.7) bargap(10) color("maroon") lcolor(black)) ///  
	/// endline vaccination rate (treatment only)
	(bar mean_baseline any_treat, ///
	barwidth(0.7) bargap(10) color("gs14") lcolor(black)) /// 
	/// add label to endline vaccination rate (treatment only)
	(scatter mean_endline_grapos any_treat if any_treat == 1, ///
	msym(none) mlab(label_endline) mlabsize(medium) ///
	mlabposition(12) mlabcolor(black)) ///
	/// add sem interval to baseline vaccination rate
	(rcap se_upper_baseline se_lower_baseline any_treat, ///
	lcolor(black)) ///
	/// add sem interval to endline vaccination rate (treatment only)
	(rcap se_upper_endline se_lower_endline any_treat, lcolor(black)) ///
	/// add label to baseline vaccination rate
	(scatter mean_baseline_grapos any_treat, ///
	msym(none) mlab(label_baseline) mlabsize(medium) mlabposition(12) mlabcolor(black)), ///
	/// general plot options
	xtitle("") legend(off) xlabel(1 "Pooled Treatment" 0 "Control", ///
	noticks labsize(medium) nogrid) ///
	ytitle("Proportion Vaccinated", size(large)) ylabel(0(0.05)0.40, nogrid) ///
	yscale(range(0 0.40)) xscale(range(-0.25 1.25)) ///
	plotregion(fcolor(white) lcolor(black)) ///
	ylabel(0(0.05)0.4, format(%7.2fc))
graph export "${graphs}/vacrate_pooled.svg", replace as(svg)
graph export "${graphs}/vacrate_pooled.png", replace as(png)

*------------------------------------------------------------------
*	Figure 3: Count of People Vaccinated per Site Before and After 
*	Mobile Vaccination Program
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear

tempfile census_exit_original
save `census_exit_original'

local conditions ///
	"" /// Anyone at all
	"keep if in_census == 1" /// Anyone recorded in census
	"keep if diff_comm == 1 | in_census == 1" /// Including those outside the treatment village

// get estimates
local i = 1
foreach condition in "`conditions'" {
	
	// use original census-exit survey data
	use `census_exit_original', clear
	
	`condition'
	
	// collapse to community level
	collapse (sum) vaccinated_endline_`i' = vaccinated_endline ///
	vaccinated_baseline_`i' = vaccinated_baseline ///
	(count) baseline_count_`i' = vaccinated_baseline ///
	endline_count_`i' = vaccinated_endline ///
	(max) any_treat treatment grpID, ///
	by(community_code)
	
	tempfile `i'
	save ``i''
	
	local ++i
	
}
	
use `1', clear
merge 1:1 community_code using `2', nogen assert(3)
merge 1:1 community_code using `3', nogen assert(3)

local collapse_cmd
local i = 1
foreach condition in any census outside {
	
	local collapse_cmd `collapse_cmd' ///
		(mean) vaccinated_endline_`condition' = vaccinated_endline_`i' ///
		(semean) se_endline_`condition' = vaccinated_endline_`i'
		
	local ++i
	
}

collapse (mean) vaccinated_baseline_any = vaccinated_baseline_1 ///
		(semean) se_baseline_any = vaccinated_baseline_1 ///
		`collapse_cmd', by(any_treat)
		
foreach condition in any census outside {
	
	gen se_upper_`condition' = vaccinated_endline_`condition' + ///
		(se_endline_`condition')
		
	gen se_lower_`condition' = vaccinated_endline_`condition' - ///
		(se_endline_`condition')
		
	format vaccinated_endline_`condition' %7.2f
	
}

gen se_upper_baseline = vaccinated_baseline_any + se_baseline_any
gen se_lower_baseline = vaccinated_baseline_any - se_baseline_any
		
local only_treatment_vars vaccinated_endline_any se_endline_any ///
		vaccinated_endline_census se_endline_census ///
		vaccinated_endline_outside se_endline_outside se_upper_any /// 
		se_lower_any se_upper_census se_lower_census se_upper_outside ///
		se_lower_outside 
		
foreach var of varlist `only_treatment_vars' {
	
	replace `var' = . if any_treat == 0
	
}

gen vax_graphpos = any_treat + 0.08

local graph_cmd
foreach condition in any outside census {
	
	local graph_cmd `graph_cmd' ///
		/// graph the bars
		(bar vaccinated_endline_`condition' any_treat if any_treat == 1, ///
		barwidth(0.45) bargap(20) color(maroon) lcolor(black)) ///
		/// and graph the sem  
		(rcap se_upper_`condition' se_lower_`condition' any_treat if any_treat == 1, ///
		lcolor(black)) ///
		/// and add labels
		(scatter vaccinated_endline_`condition' vax_graphpos if any_treat == 1, ///
		msym(none) mlab(vaccinated_endline_`condition') ///
		mlabsize(medsmall) mlabposition(12) mlabcolor(black))
	
	if "`condition'" == "census" {
		
		gen vax_graphpos_baseline = vaccinated_baseline_any + 2
			
		local graph_cmd `graph_cmd' ///
		/// add baseline stats
		(bar vaccinated_baseline_any any_treat, ///
		barwidth(0.45) bargap(20) color(gs14) lcolor(black)) ///
		/// add sem
		(rcap se_upper_baseline se_lower_baseline any_treat, ///
		lcolor(black)) ///
		/// and add labels
		(scatter vaccinated_baseline_any vax_graphpos, ///
		msym(none) mlab(vaccinated_baseline_any) ///
		mlabsize(medsmall) mlabposition(12) mlabcolor(black))
		
	}
	
}

graph twoway `graph_cmd', ///
	/// general plot options
	xtitle("") legend(off) xlabel(1 "Pooled Treatment" 0 "Control", ///
	noticks labsize(medium) nogrid) ///
	ytitle("Number Vaccinated", size(large)) ylabel(0(10)60) ///
	yscale(range(0 60)) xscale(range(0 1)) ///
	plotregion(fcolor(white) lcolor(black)) ///
	text(20.71 0.31 "Present During Census", place(e) just(right) size(medsmall)) ///
	text(38 0.37 "From Other Villages", place(e) just(right) size(medsmall)) ///
	text(4.72 0.59 "Baseline", place(e) just(right) size(medsmall)) ///
	text(49.38 0.63 "Other", place(e) just(right) size(medsmall)) ///
	ylabel(0(10)60, format(%7.0f))

graph export "${graphs}/vaccount_pooled.svg", replace as(svg)
graph export "${graphs}/vaccount_pooled.png", replace as(png)

*------------------------------------------------------------------
*	Figure 4: Effect of Pooled Treatment on Knowledge and Attitudes
*	Among Adults Enumerated During Census
*------------------------------------------------------------------

local covid_vars END_covid_believe END_covid_know
local vax_vars END_effect_stragree END_safe_stragree
local trust_vars END_trust_chc END_trust_mohs END_trust_media ///
		END_trust_socmedia END_trust_famfriend

local coefplot_vars `covid_vars' `vax_vars' `trust_vars'
		
use "${data}/individual_level.dta", clear
keep if incomplete_observations == 0
keep `coefplot_vars' any_treat grpID community_code incomplete_observations

local plot_cmd
foreach var of varlist `coefplot_vars' {

	areg `var' any_treat, absorb(grpID) cluster(community_code)
	est store `var'_coef
	
}

foreach set in covid vax trust {
	
	local plot_`set' (
	
	foreach var of varlist ``set'_vars' {
		
		local plot_`set' `plot_`set'' `var'_coef, asequation("`: variable label `var''") \
		
	}
	
	local plot_`set' `plot_`set'' )
	
	local plot_cmd `plot_cmd' `plot_`set''
	
}

coefplot `plot_cmd', keep(any_treat) xline(0) ///
	mlabel(string(@b,"%9.3f") + ///
	cond(@pval<.01, "***", cond(@pval<.05, "**", ///
	cond(@pval<.1, "*", "")))) mlabposition(12) mlabsize(small) legend(off) ///
	swapnames aseq xlabel(-0.1(0.05)0.3, nogrid) grid(none) ///
	plotregion(fcolor(white) lcolor(black))
	
graph export "${graphs}/knowledge_attitudes.svg", replace as(svg)
graph export "${graphs}/knowledge_attitudes.png", replace as(png)
	
*------------------------------------------------------------------
*	Figure 5: Effect of Pooled Treatment by Respondent 
*	Characteristics Among those Enumerated During Census
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear

keep if above18 == 1 & in_census == 1

local conditions "" /// 1 
	"if female == 1" /// 2
	"if female == 0" /// 3
	"if Age18_24 == 1" /// 4
	"if Age25_54 == 1" /// 5
	"if Age55 == 1" /// 6
	"if anyschooling == 1" /// 7
	"if anyschooling == 0" /// 8
	"if BSL_owns_land == 1" /// 9
	"if BSL_owns_land == 0" /// 10
	"if BSL_reduced_portions == 1" /// 11
	"if BSL_reduced_portions == 0" /// 12
	
local plot_cmd
local i = 1
foreach condition in "`conditions'" {
	
	reghdfe vaccinated_endline any_treat `condition', ///
		absorb(grpID) cluster(community_code)
	estimates store M`i'
	
	local plot_cmd `plot_cmd' M`i'
	local ++i
	
}

coefplot (M1, asequation("{bf:Full sample}")) ///
	(M2, asequation("Female") \ M3, asequation("Male")) ///
	(M4, asequation("Aged 18-24") \ M5, asequation("Aged 25-54") \ ///
	M6, asequation("Aged 55+")) ///
	(M7, asequation("HH head any schooling") \ M8, asequation("HH head no schooling")) ///
	(M9, asequation("HH owns any land") \ M10, asequation("HH owns no land")) ///
	(M11, asequation("HH reduced portions of food") \ M12, asequation("HH did not reduce portions of food")) ///
	, keep(any_treat) xline(0) ///
	mlabel(string(@b,"%9.3f") + ///
	cond(@pval<.01, "***", cond(@pval<.05, "**", ///
	cond(@pval<.1, "*", "")))) mlabposition(12) legend(off) ///
	mlabsize(small) ///
	xlabel(0(0.05)0.35, nogrid) grid(none) ///
	plotregion(fcolor(white) lcolor(black)) swapnames aseq
	
graph export "${graphs}/respondent_characteristics.svg", replace as(svg)
graph export "${graphs}/respondent_characteristics.png", replace as(png)

*------------------------------------------------------------------
*	Figure 6: Effect Sizes in Previous Vaccine Uptake RCTs
*------------------------------------------------------------------

use "${data}/lit_review.dta", clear

label define classification 5 "Healthcare Improv.", modify

egen median = median(effectsize), by(classification)
egen loq = pctile(effectsize), by(classification) p(25)
egen upq = pctile(effectsize) , by(classification) p(75)
egen min = min(effectsize)
egen n = count(effectsize), by(classification) 
bysort classification (effectsize): gen use=_n
gen shown = "{it:n} = " + string(n) 
gen classification2 = classification + 0.1
gen classification3 = classification - 0.1
		
stripplot effectsize if author != "THIS STUDY", ms(none) over(classification) ///
	box(barw(0.2) bfcolor("maroon") lcolor(black)) ///
	whiskers(lcolor(black) lwidth(med) msize(small)) ///
	vertical ///
	addplot(scatter median loq upq classification2 if use==1, ms(none ..) ///
	mlabel(median loq upq) mlabsize(7pt 7pt 7pt) mlabcolor(black black black) || ///
	scatter min classification if use==1, ms(none) mlabel(shown) mlabcolor(black) ///
	mlabsize(8pt) mlabpos(6)) ///
	xsc(r(. 5.4)) ysc(r(-1 3)) xla(, noticks labsize(medsmall) alternate) ///
	ytitle("Effect Size (%p)", size(medium)) ///
	xtitle("Intervention Type", size(medium)) ///
	plotregion(fcolor(white) lcolor(black)) ///
	bgcolor(white) graphregion(fcolor(white)) ///
	pctile(5) outside(ms(o) mcolor(black) msize(medsmall))
		
graph export "${graphs}/literature_effects.svg", replace as(svg)
graph export "${graphs}/literature_effects.png", replace as(png)
			
*------------------------------------------------------------------
*	Figure 7: Cost Per Person Vaccinated Compared to Other Studies
*------------------------------------------------------------------		

replace cost_effect = 600 if cost_effect > 2000 & !missing(cost_effect)
labmask id, values(study_label)

label define id 244 "{bf}THIS STUDY (2022)", modify

graph hbar cost_effect if !missing(cost_effect), /// 
		over(classification) over(id, sort(cost_effect) ///
		label(labsize(small)))  ///
		ytitle("Cost per vaccine administered in 2000 USD", size(small)) ///
		ylabel(,labsize(small)) asyvars nofill ///
		legend(cols(1) size(small) ring(0) position(1)) ///
		ylabel(0(100)600) ysize(7) ///
		plotregion(fcolor(white) lcolor(black)) ///
		bgcolor(white) graphregion(fcolor(white) margin(medium)) ///
		scheme(s2color) blabel(bar, format(%7.2f)) ///
		yscale(range(. 700))
		
sum cost_effect
		
graph export "${graphs}/literature_cost.svg", replace as(svg) height(5000)
graph export "${graphs}/literature_cost.png", replace as(png) height(5000)
		
*===============================================================================
* 	Tables
*===============================================================================

*------------------------------------------------------------------
*	Table 1: Intent-to-Treat Effect of Door to Door and Small
* 	Group Treatments
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear

merge m:1 community_code using "${data}/community_data.dta", assert(3 4 5) ///
		keepusing(${covariates}) nogen replace update

keep if above18 == 1 & in_census == 1

est clear
eststo: reghdfe vaccinated_endline i.treatment, absorb(grpID) cluster(community_code)  

sum vaccinated_endline if treatment == 0
local temp : display %9.3f `r(mean)'
estadd local mic `temp'

estadd local obs `e(N)'

unique structure_id
estadd local nos `r(unique)'

local temp : display %9.3f `e(r2)'
estadd local rsq `temp' 

unique community_code
estadd local nov `r(unique)'

test 1.treatment = 2.treatment
local temp : display %9.3f `r(p)'
estadd local pds `temp'

estadd local cov "No"

eststo: reghdfe vaccinated_endline i.treatment ${covariates} ///
		, absorb(grpID) cluster(community_code)  

sum vaccinated_endline if treatment == 0
local temp : display %9.3f `r(mean)'
estadd local mic `temp'

estadd local obs `e(N)'

unique structure_id
estadd local nos `r(unique)'

local temp : display %9.3f `e(r2)'
estadd local rsq `temp' 

unique community_code
estadd local nov `r(unique)'

test 1.treatment = 2.treatment
local temp : display %9.3f `r(p)'
estadd local pds `temp'

estadd local cov "Yes"

keep if treatment == 1 & periphery == 0
eststo: reghdfe vaccinated_endline dtd_treatment, ///
		cluster(structure_id) absorb(community_code)

estadd local obs `e(N)'

local temp : display %9.3f `e(r2)'
estadd local rsq `temp' 

unique structure_id
estadd local nos `r(unique)'

sum vaccinated_endline if dtd_treatment == 0
local temp : display %9.3f `r(mean)'
estadd local mic `temp'

unique community_code
estadd local nov `r(unique)'

estadd local pds ""
estadd local cov "No"

esttab using "${tables}/itt_over_treatments.tex", ///
		b(3) se(3) ///
		rename(dtd_treatment dtd 1.treatment dtd) ///
		keep(dtd 2.treatment vaccinated_baseline_18) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("cov Additional Covariates" ///
		"obs Observations" "mic Mean in Control" "nov No. of Villages" ///
		"nos No. of Structures" ///
		"pds \$P(\beta_{Door-to-Door} = \beta_{Small-Group})\$" ///
		"rsq \$R^2\$") sfmt(3 0) ///
		coef(dtd "Door-to-Door" 2.treatment "Small-Group" ///
		vaccinated_baseline_18 "Proportion Vaccinated at Baseline") align(cccc) ///
		label booktabs noobs nonotes nomtitle collabels(none) compress replace
		
esttab using "${tables}/itt_over_treatments.rtf", ///
		b(3) se(3) ///
		rename(dtd_treatment dtd 1.treatment dtd) ///
		keep(dtd 2.treatment vaccinated_baseline_18) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("cov Additional Covariates" ///
		"obs Observations" "mic Mean in Control" "nov No. of Villages" ///
		"nos No. of Structures" ///
		"pds \$P(\beta_{Door-to-Door} = \beta_{Small-Group})\$" ///
		"rsq \$R^2\$") sfmt(3 0) ///
		coef(dtd "Door-to-Door" 2.treatment "Small-Group" ///
		vaccinated_baseline_18 "Proportion Vaccinated at Baseline") align(cccc) ///
		label noobs nonotes nomtitle collabels(none) compress replace

*------------------------------------------------------------------
*	Table 2: Baseline Descriptive Statistics and 
*	Statistical Balance
*------------------------------------------------------------------

local census_15_vars perc_immun perc_literacy avg_age health_fivemiles_above ///
		perc_christian perc_muslim perc_born perc_emp_ag perc_acc_int ///
		formal_structure own_land prim_school_fivemiles ///
		source_water_fivemiles radio_ownership cell_ownership formal_roof ///
		all_assets
		
local vc_vars_1 villsize vaccinated_baseline_18 anyschooling ///
		BSL_owns_land BSL_reduced_portions age ///
		hh_gender breast preg

use "${data}/community_data.dta", clear

keep community_code treatment grpID any_treat `census_15_vars' `vc_vars_1'
		
renvars `vc_vars_1', prefix("VC_")
label values treatment treat1
		
local vc_vars VC_villsize VC_vaccinated_baseline_18 VC_anyschooling ///
		VC_BSL_owns_land VC_BSL_reduced_portions VC_age ///
		VC_hh_gender VC_breast VC_preg
		
local balance_vars `census_15_vars' `vc_vars'

label variable VC_villsize "Village population"
label variable VC_vaccinated_baseline_18 "Proportion of adults vaccinated at baseline"
label variable VC_anyschooling "HH head has had any schooling"
label variable VC_BSL_owns_land "Owns land"
label variable VC_BSL_reduced_portions "Reduced portion sizes in last week"
label variable VC_age "Age"
label variable VC_hh_gender "HH head is female"
label variable VC_breast "Is breastfeeding"
label variable VC_preg "Is pregnant"

matrix baseline_table = J(53, 5, .)

est clear

local rownum = -1
foreach var of varlist `balance_vars' {
	
	local label_`var' : variable label `var'
	
	local rownum = `rownum' + 2
	local rownum_se = `rownum' + 1
	
	sum `var' if treatment == 0
	matrix baseline_table[`rownum', 1] = `r(mean)'
	matrix baseline_table[`rownum_se', 1] = `r(sd)'
	
	eststo: areg `var'  i.treatment, absorb(grpID) vce(robust)
	
	matrix baseline_table[`rownum', 2] = _b[1.treatment]
	matrix baseline_table[`rownum_se', 2] = _se[1.treatment]
	
	matrix baseline_table[`rownum', 3] = _b[2.treatment]
	matrix baseline_table[`rownum_se', 3] = _se[2.treatment]
	
	lincom _b[1.treatment] - _b[2.treatment]
		
	matrix baseline_table[`rownum', 4] = `r(estimate)'
	matrix baseline_table[`rownum_se', 4] = `r(se)'
	
	matrix baseline_table[`rownum', 5] = `e(N)'
	
}

/* 
Multinomial f-test as per reference: Özler, Berk, Lia C.H. Fernald, 
Patricia Kariger, Christin McConnell, Michelle Neuman, and Eduardo Fraga (2018).
"Combining pre-school teacher training with parenting education: A 
cluster-randomized controlled trial." Journal of Development Economics 
133, 448–467. 
*/

mlogit treatment `balance_vars', baseoutcome(0)
test[Individual_Mobilization]
matrix baseline_table[53, 2] = `r(p)'
test[Group_Mobilization]
matrix baseline_table[53, 3] = `r(p)'

mlogit treatment `balance_vars', baseoutcome(1)
test[Group_Mobilization]
matrix baseline_table[53, 4] = `r(p)'

clear
gen variable = ""

local rownum = 1
foreach var in `balance_vars' {
	
	insobs 2
	replace variable = "`label_`var''" in `rownum'
	
	local rownum = `rownum' + 2
	
}

svmat baseline_table, names(table)
rename table1 control_mean
rename table2 c_d2d_diff
rename table3 c_smgr_diff
rename table4 d2d_smgr_diff
rename table5 N

replace variable = "\hspace{0.25cm} " + variable
replace variable = "Joint F-test p-value" in 53

tostring control_mean c_d2d_diff c_smgr_diff d2d_smgr_diff, ///
	replace format(%9.3f) force
	
tostring N, replace

foreach var of varlist control_mean c_d2d_diff c_smgr_diff d2d_smgr_diff N {
	
	replace `var' = "" if `var' == "."
	replace `var' = "(" + `var' + ")" if mod(_n, 2) == 0 & !missing(`var')
	
}

label variable variable ""
label variable control_mean "\multicolumn{1}{c}{\shortstack{Control\\Mean\\(SD)}}"
label variable c_d2d_diff "\multicolumn{1}{c}{\shortstack{Control-\\Door\\to Door\\Diff\\(SE)}}"
label variable c_smgr_diff "\multicolumn{1}{c}{\shortstack{Control-\\Small\\Group\\Diff\\(SE)}}"
label variable d2d_smgr_diff "\multicolumn{1}{c}{\shortstack{Door to \\Door-Small\\ Group\\Diff\\(SE)}}"
label variable N "\multicolumn{1}{c}{N}"

insobs 1, before (1)
replace variable = "\textbf{Community Characteristics from 2015 Census}" in 1
insobs 1, before(36)
replace variable = "\textbf{Characteristics from Village Census}" in 36

texsave using "${tables}/census_bal.tex", ///
	title(Baseline Descriptive Statistics and Statistical Balance) ///
	align(lccccc) location(h) ///
	replace ///
	noendash ///
	hlines(35 54) nofix ///
	size(2) varlabels ///
	bold("Joint F-test p-value")
	
label variable variable ""
label variable control_mean "Control Mean (SD)"
label variable c_d2d_diff "Control-Door-to-Door Diff(SE)"
label variable c_smgr_diff "Control-Small-Group Diff (SE)"
label variable d2d_smgr_diff "Door-to-Door-Small-Group Diff (SE)"
label variable N "N"

replace variable = subinstr(variable, "\hspace{0.25cm}", "", .)
replace variable = subinstr(variable, "\textbf{", "", .)
replace variable = subinstr(variable, "}", "", 1) if regexm(variable, "}$")
replace variable = strtrim(variable)
	
export excel using "${tables}/census_bal.xlsx", replace firstrow(varlabels)
	
*------------------------------------------------------------------
*	Table 3: Intent-to-treat Estimates of Vaccination
*	Rate of People Enumerated During Census
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear

keep if above18 == 1 & in_census == 1

est clear

eststo reg1: areg vaccinated_endline any_treat ///
		, absorb(grpID) cluster(community_code)
		
boottest any_treat, seed(420) reps(999)
estadd local p_val `: display %7.3f `r(p)''
estadd local covariates "No"

sum vaccinated_endline if treatment==0 
estadd local ctlm `: display %7.3f `r(mean)''
estadd local ctlsd = `: display %7.3f `r(sd)''
estadd local clusters = `e(N_clust)'

merge m:1 community_code using "${data}/community_data.dta", assert(3 4 5) ///
		keepusing(${covariates}) nogen replace update

eststo reg2: areg vaccinated_endline any_treat ${covariates} ///
		, absorb(grpID) cluster(community_code)

boottest any_treat, seed(420) reps(999)
estadd local p_val `: display %7.3f `r(p)''
estadd local covariates "Yes"

sum vaccinated_endline if treatment==0 
estadd local ctlm `: display %7.3f `r(mean)''
estadd local ctlsd = `: display %7.3f `r(sd)''
estadd local clusters = `e(N_clust)'

collapse (mean) vaccinated_baseline vaccinated_endline ///
		(firstnm) any_treat treatment grpID, ///
		by(community_code)

eststo reg3: areg vaccinated_endline any_treat, absorb(grpID)

boottest any_treat, seed(420) reps(999)
estadd local p_val `: display %7.3f `r(p)''
estadd local covariates "No"

sum vaccinated_endline if treatment==0 
estadd local ctlm `: display %7.3f `r(mean)''
estadd local ctlsd = `: display %7.3f `r(sd)''
estadd local clusters = `e(N)'

esttab using "${tables}/itt_rate.tex", ///
		b(3) se(3) ///
		keep(any_treat vaccinated_baseline_18) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("covariates Additional Covariates" ///
		"p_val Bootstrapped P-Value" "ctlm Mean in Control" ///
		"N No. of Observations" "clusters No. of Villages" "r2 \$R^2\$") ///
		sfmt(%10.2f %10.2f  %10.0f %10.0f %10.2f) ///
		coef(any_treat "Pooled Treatment" ///
		vaccinated_baseline_18 "Proportion Vaccinated at Baseline") ///
		label booktabs noobs nonotes nomtitle collabels(none) compress replace
		
esttab using "${tables}/itt_rate.rtf", ///
		b(3) se(3) ///
		keep(any_treat vaccinated_baseline_18) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("covariates Additional Covariates" ///
		"p_val Bootstrapped P-Value" "ctlm Mean in Control" ///
		"N No. of Observations" "clusters No. of Villages" "r2 \$R^2\$") ///
		sfmt(%10.2f %10.2f  %10.0f %10.0f %10.2f) ///
		coef(any_treat "Pooled Treatment" ///
		vaccinated_baseline_18 "Proportion Vaccinated at Baseline") ///
		label noobs nonotes nomtitle collabels(none) compress replace
		
*------------------------------------------------------------------
*	Table 4: Intent-To-Treat estimates of the Count 
*	of People Vaccinated per Site After Mobile Vaccination Program
*------------------------------------------------------------------		

use "${data}/individual_level.dta", clear
		
local conditions ///
	"keep if in_census == 1" /// Anyone recorded in census
	"keep if diff_comm == 1 | in_census == 1" /// Including those outside the treatment village
	"" /// Anyone at all
	
merge m:1 community_code using "${data}/community_data.dta", assert(3 4 5) ///
		keepusing(${covariates}) nogen replace update
		
est clear
foreach condition in "`conditions'" {
	
	preserve 
	
	`condition'
	
	collapse (sum) vaccinated_endline ///
		(firstnm) treatment grpID any_treat ///
		${covariates}, by(community_code)
		
	eststo: areg vaccinated_endline any_treat, absorb(grpID) ///
		cluster(community_code)
	boottest any_treat, seed(420) reps(999)
	estadd local p_val `: display %7.3f `r(p)''
	
	sum vaccinated_endline if treatment==0 
	estadd local ctlm `: display %7.3f `r(mean)''
	estadd local ctlsd = `: display %7.3f `r(sd)''
	estadd local cov "No"
	
	restore
	
}

foreach condition in "`conditions'" {
	
	preserve 
	
	`condition'
	
	collapse (sum) vaccinated_endline ///
		(firstnm) treatment grpID any_treat ///
		${covariates}, by(community_code)
		
	eststo: areg vaccinated_endline any_treat ${covariates} ///
		, absorb(grpID) cluster(community_code)
	boottest any_treat, seed(420) reps(999)
	estadd local p_val `: display %7.3f `r(p)''
	
	sum vaccinated_endline if treatment==0 
	estadd local ctlm `: display %7.3f `r(mean)''
	estadd local ctlsd = `: display %7.3f `r(sd)''
	estadd local cov "Yes"
	
	restore
	
}

esttab using "${tables}/itt_count.tex", ///
		b(3) se(3) ///
		keep(any_treat vaccinated_baseline_18) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("cov Additional Covariates" "p_val Bootstrapped P-Value" ///
		"ctlm Mean in Control" ///
		"N No. of Observations" "r2 \$R^2\$") ///
		sfmt(%10.2f %10.2f %10.2f %10.0f %10.2f) ///
		coef(any_treat "Pooled treatment" ///
		vaccinated_baseline_18 "Proportion Vaccinated at Baseline") ///
		label booktabs noobs nonotes nomtitle collabels(none) compress replace ///
		width(\textwidth)
		
esttab using "${tables}/itt_count.rtf", ///
		b(3) se(3) ///
		keep(any_treat vaccinated_baseline_18) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("cov Additional Covariates" "p_val Bootstrapped P-Value" ///
		"ctlm Mean in Control" ///
		"N No. of Observations" "r2 \$R^2\$") ///
		sfmt(%10.2f %10.2f %10.2f %10.0f %10.2f) ///
		coef(any_treat "Pooled treatment" ///
		vaccinated_baseline_18 "Proportion Vaccinated at Baseline") ///
		label noobs nonotes nomtitle collabels(none) compress replace
		
*------------------------------------------------------------------
*	Table 5: Proportion Vaccinated by Baseline 
*	Willingness to Take Vaccines and Meeting Attendance
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear

keep if treatment != 0 & in_baseline == 1
keep attended vaccinated_endline BSL_covid_wouldtake

keep if !missing(BSL_covid_wouldtake) & !missing(attended)

table ( BSL_covid_wouldtake ) ( attended ) (), command(r(mean): sum vaccinated_endline)
collect label dim attended "\textbf{Attended meeting}", modify
collect label levels attended 0 "No (1,066)" 1 "Yes (636)", modify
collect label levels BSL_covid_wouldtake 0 "No (279)" 1 "Yes (1,423)", modify
collect label dim BSL_covid_wouldtake "\textbf{Would take COVID-19 vaccine if offered}", modify
collect style cell, nformat(%9.3f)

collect export "${tables}/att_willing.tex", as(tex) tableonly replace

collect label dim attended "HH member attended meeting", modify
collect label dim covid_wouldtake "Would take COVID-19 vaccine if offered", modify
collect export "${tables}/att_willing.docx", as(docx) replace
		
*------------------------------------------------------------------
*	Table 6: Intent-To-Treat estimates for Knowledge 
* 	and Attitudes Towards Vaccines in Sub-sample
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear

keep if incomplete_observations == 0

est clear
local j = 1
foreach var of varlist END_covid_believe END_covid_know  ///
END_effect_stragree END_safe_stragree {
	
	tempfile tf`j'
	
	eststo: areg `var' any_treat, cluster(community_code) absorb(grpID)
	estadd local clusters = `e(N_clust)'
	
	parmest, label saving("`tf`j''", replace) idnum(`j')
	
	boottest any_treat, seed(420) reps(999)
	estadd local p_val `: display %7.3f `r(p)''
	
	sum `var' if treatment==0 & e(sample)
	estadd local ctlm `: display %7.3f `r(mean)''
	estadd local ctlsd = `: display %7.3f `r(sd)''
	
	local ++j
	
}

preserve 

clear
local j = `j' - 1
forvalues i = 1/`j' {
	
	local t = (`i' * 2) - 1
	
	append using `"`tf`i''"'
	
}

keep if parm == "any_treat"

local n_comparisons = `j'
tempvar original_order p_val_order
gen `original_order' = _n
sort p
gen `p_val_order' = _n
gen qval = .

local tempvars total_rejected1 total_rejected2 fdr_temp1 fdr_temp2 ///
		reject_temp1 reject_temp2 reject_rank1 reject_rank2

local qval = 1 
while `qval' > 0 {
	
	tempvar `tempvars'
	local vars_to_drop
	
	foreach v in `tempvars' {
		
		local vars_to_drop `vars_to_drop' ``v''
		
	}
	
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	* Generate value q'*r/M
	gen `fdr_temp1' = `qval_adj'*`p_val_order'/`n_comparisons'
	* Generate binary variable checking condition p(r) <= q'*r/M
	gen `reject_temp1' = (`fdr_temp1'>=p) if p!=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen `reject_rank1' = `reject_temp1'*`p_val_order'
	* Record the rank of the largest p-value that meets above condition
	egen `total_rejected1' = max(`reject_rank1')

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`n_comparisons'/(`n_comparisons'-`total_rejected1'[1]))
	* Generate value q_2st*r/M
	gen `fdr_temp2' = `qval_2st'*`p_val_order'/`n_comparisons'
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	gen `reject_temp2' = (`fdr_temp2'>=p) if p!=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen `reject_rank2' = `reject_temp2'*`p_val_order'
	* Record the rank of the largest p-value that meets above condition
	egen `total_rejected2' = max(`reject_rank2')

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace qval = `qval' if `p_val_order' <= `total_rejected2' & `p_val_order'!=.
	* Reduce q by 0.001 and repeat loop
	cap drop `vars_to_drop'
	local qval = `qval' - .001
	
}

sort `original_order'

forvalues i = 1/`j' {
	
	estimates restore est`i'
	local qval = qval[`i']
	estadd local qval `: display %7.3f `qval''
	
}

restore

esttab using "${tables}/itt_knowledge.tex", ///
		b(3) se(3) ///
		keep(any_treat) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("p_val Bootstrapped P-Value" "qval FDR Q-value" ///
		"ctlm Mean in Control" ///
		"N No. of Observations" "clusters No. of Villages" "r2 \$R^2\$") ///
		sfmt(%10.2f %10.2f %10.0f %10.0f %10.2f) ///
		coef(any_treat "Pooled Treatment") ///
		label booktabs noobs nonotes collabels(none) compress replace ///
		mtitle("Believes COVID-19 is real" "Knows about the COVID-19 vaccine" ///
		"Believes vaccines are effective" "Believes vaccines are safe")
		
esttab using "${tables}/itt_knowledge.rtf", ///
		b(3) se(3) ///
		keep(any_treat) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("p_val Bootstrapped P-Value" "qval FDR Q-value" ///
		"ctlm Mean in Control" ///
		"N No. of Observations" "clusters No. of Villages" "r2 \$R^2\$") ///
		sfmt(%10.2f %10.2f %10.0f %10.0f %10.2f) ///
		coef(any_treat "Pooled Treatment") ///
		label noobs nonotes collabels(none) compress replace ///
		mtitle("Believes COVID-19 is real" "Knows about the COVID-19 vaccine" ///
		"Believes vaccines are effective" "Believes vaccines are safe")
		
*------------------------------------------------------------------
*	Table 7: Intent-To-Treat Estimates for Source 
*	People Trust Most for Information on COVID-19 in Sub-sample
*------------------------------------------------------------------

est clear
local j = 1
foreach var of varlist END_trust_chc END_trust_mohs END_trust_media ///
END_trust_socmedia END_trust_famfriend {
	
	tempfile tf`j'
	
	eststo: areg `var' any_treat, cluster(community_code) absorb(grpID)
	estadd local clusters = `e(N_clust)'
	
	parmest, label saving(`"`tf`j''"', replace) idn(`j')
	
	boottest any_treat, seed(420) reps(999)
	estadd local p_val `: display %7.3f `r(p)''
	
	sum `var' if treatment==0 & e(sample)
	estadd local ctlm `: display %7.3f `r(mean)''
	estadd local ctlsd = `: display %7.3f `r(sd)''
	
	local ++j
	
}

preserve 

clear

local j = `j' - 1
forvalues i = 1/`j' {
	
	local t = (`i' * 2) - 1
	
	append using `"`tf`i''"'
	
}

keep if parm == "any_treat"

local n_comparisons = `j'
tempvar original_order p_val_order
gen `original_order' = _n
sort p
gen `p_val_order' = _n
gen qval = .

local tempvars total_rejected1 total_rejected2 fdr_temp1 fdr_temp2 ///
		reject_temp1 reject_temp2 reject_rank1 reject_rank2

local qval = 1 
while `qval' > 0 {
	
	tempvar `tempvars'
	local vars_to_drop
	
	foreach v in `tempvars' {
		
		local vars_to_drop `vars_to_drop' ``v''
		
	}
	
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	* Generate value q'*r/M
	gen `fdr_temp1' = `qval_adj'*`p_val_order'/`n_comparisons'
	* Generate binary variable checking condition p(r) <= q'*r/M
	gen `reject_temp1' = (`fdr_temp1'>=p) if p!=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen `reject_rank1' = `reject_temp1'*`p_val_order'
	* Record the rank of the largest p-value that meets above condition
	egen `total_rejected1' = max(`reject_rank1')

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`n_comparisons'/(`n_comparisons'-`total_rejected1'[1]))
	* Generate value q_2st*r/M
	gen `fdr_temp2' = `qval_2st'*`p_val_order'/`n_comparisons'
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	gen `reject_temp2' = (`fdr_temp2'>=p) if p!=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen `reject_rank2' = `reject_temp2'*`p_val_order'
	* Record the rank of the largest p-value that meets above condition
	egen `total_rejected2' = max(`reject_rank2')

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace qval = `qval' if `p_val_order' <= `total_rejected2' & `p_val_order'!=.
	* Reduce q by 0.001 and repeat loop
	cap drop `vars_to_drop'
	local qval = `qval' - .001
	
}

sort `original_order'

forvalues i = 1/`j' {
	
	estimates restore est`i'
	local qval = qval[`i']
	estadd local qval `: display %7.3f `qval''
	
}

restore

esttab using "${tables}/itt_trust.tex", ///
		b(3) se(3) ///
		keep(any_treat) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("p_val Bootstrapped P-Value" "qval FDR Q-value" ///
		"ctlm Mean in Control" ///
		"N No. of Observations" "clusters No. of Villages" "r2 \$R^2\$") ///
		sfmt(%10.2f %10.2f %10.0f %10.0f %10.2f) ///
		coef(any_treat "Pooled Treatment") ///
		label booktabs noobs nonotes collabels(none) compress replace ///
		mtitle("Community Health Clinic" "Ministry of Health and Sanitation" ///
		"Media" "Social media" "Family and friends")
		
esttab using "${tables}/itt_trust.rtf", ///
		b(3) se(3) ///
		keep(any_treat) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("p_val Bootstrapped P-Value" "qval FDR Q-value" ///
		"ctlm Mean in Control" ///
		"N No. of Observations" "clusters No. of Villages" "r2 \$R^2\$") ///
		sfmt(%10.2f %10.2f %10.0f %10.0f %10.2f) ///
		coef(any_treat "Pooled Treatment") ///
		label noobs nonotes collabels(none) compress replace ///
		mtitle("Community Health Clinic" "Ministry of Health and Sanitation" ///
		"Media" "Social media" "Family and friends")
		
*------------------------------------------------------------------
*	Table 8: Intent-To-Treat Estimates for Demographic Sub-groups
*------------------------------------------------------------------
		
use "${data}/individual_level.dta", clear

keep if above18 == 1 & in_census == 1

estimates clear

local conditions "" /// 1 
	"if female == 1" /// 2
	"if female == 0" /// 3
	"if Age18_24 == 1" /// 4
	"if Age25_54 == 1" /// 5
	"if Age55 == 1" /// 6
	"if anyschooling == 1" /// 7
	"if anyschooling == 0" /// 8
	"if BSL_owns_land == 1" /// 9
	"if BSL_owns_land == 0" /// 10
	"if BSL_reduced_portions == 1" /// 11
	"if BSL_reduced_portions == 0" /// 12

foreach cond in "`conditions'" {
	
	preserve 
	
	if "`cond'" != "" {
		
		keep `cond'
		
	}
	
	eststo: areg vaccinated_endline any_treat ///
			, cluster(community_code) absorb(grpID)
			
	sum vaccinated_endline if treatment == 0
	local ctlm `: display %7.3f `r(mean)''
	local ctlsd = `: display %7.3f `r(sd)''
	
	estadd local clusters `e(N_clust)'
	estadd local ctlm `ctlm'
	estadd local ctlsd `ctlsd'
	
	restore
	
}

esttab using "${tables}/itt_demographic.tex", ///
		b(3) se(3) ///
		keep(any_treat) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("ctlm Mean in Control" ///
		"N No. of Observations" "clusters No. of Villages" ///
		"r2 \$R^2\$") ///
		sfmt(%10.2f %10.0f %10.0f %10.2f) ///
		coef(any_treat "Pooled Treatment") ///
		alignment(C) ///
		mtitle("Full sample" "Female" "Male" "Aged 18-24" ///
		"Aged 25-54" "Aged 55+" "HH head any schooling" ///
		"HH head no schooling" "HH owns any land" ///
		"HH owns no land" "HH reduced food portions" ///
		"HH did not reduce food portions") ///
		label booktabs noobs nonotes collabels(none) compress replace
		
esttab using "${tables}/itt_demographic.rtf", ///
		b(3) se(3) ///
		keep(any_treat) ///
		star(* 0.10 ** 0.05 *** 0.01) ///
		scalars("ctlm Mean in Control" ///
		"N No. of Observations" "clusters No. of Villages" ///
		"r2 \$R^2\$") ///
		sfmt(%10.2f %10.0f %10.0f %10.2f) ///
		coef(any_treat "Pooled Treatment") ///
		alignment(C) ///
		mtitle("Full sample" "Female" "Male" "Aged 18-24" ///
		"Aged 25-54" "Aged 55+" "HH head any schooling" ///
		"HH head no schooling" "HH owns any land" ///
		"HH owns no land" "HH reduced food portions" ///
		"HH did not reduce food portions") ///
		label noobs nonotes collabels(none) compress replace

*------------------------------------------------------------------
*	Table 10: Comparison of Full Sample to Sub-Sample
*------------------------------------------------------------------

local census_15_vars perc_immun perc_literacy avg_age health_fivemiles_above ///
		perc_christian perc_muslim perc_born perc_emp_ag perc_acc_int ///
		formal_structure own_land prim_school_fivemiles ///
		source_water_fivemiles radio_ownership cell_ownership formal_roof ///
		all_assets
		
local vc_vars_1 villsize vaccinated_baseline_18 anyschooling ///
		BSL_owns_land BSL_reduced_portions age ///
		hh_gender breast preg

use "${data}/community_data.dta", clear

keep community_code treatment incomplete_observations ///
	grpID any_treat `census_15_vars' `vc_vars_1'
		
renvars `vc_vars_1', prefix("VC_")
label values treatment treat1
		
local vc_vars VC_villsize VC_vaccinated_baseline_18 VC_anyschooling ///
		VC_BSL_owns_land VC_BSL_reduced_portions VC_age ///
		VC_hh_gender VC_breast VC_preg
		
local balance_vars `census_15_vars' `vc_vars'

label variable VC_villsize "Village population"
label variable VC_vaccinated_baseline_18 "Proportion of adults vaccinated at baseline"
label variable VC_anyschooling "HH head has had any schooling"
label variable VC_BSL_owns_land "Owns land"
label variable VC_BSL_reduced_portions "Reduced portion sizes in last week"
label variable VC_age "Age"
label variable VC_hh_gender "HH head is female"
label variable VC_breast "Is breastfeeding"
label variable VC_preg "Is pregnant"
		
local balance_vars `census_15_vars' `vc_vars'

matrix sample_comp_table = J(53, 5, .)

local rownum = -1
foreach var of varlist `balance_vars' {
	
	local label_`var' : variable label `var'
	
	local rownum = `rownum' + 2
	local rownum_se = `rownum' + 1
	
	sum `var'
	local full_mean = `r(mean)'
	matrix sample_comp_table[`rownum', 1] = `r(N)'	
	matrix sample_comp_table[`rownum', 2] = `: display %7.3f `r(mean)''
	matrix sample_comp_table[`rownum_se', 2] = `: display %7.3f `r(sd)''
	
	sum `var' if incomplete_observations == 0
	local subsample_mean = `r(mean)'
	matrix sample_comp_table[`rownum', 3] = `r(N)'	
	matrix sample_comp_table[`rownum', 4] = `: display %7.3f `r(mean)''
	matrix sample_comp_table[`rownum_se', 4] = `: display %7.3f `r(sd)''
	
	matrix sample_comp_table[`rownum', 5] = `:display %7.3f `full_mean' - `subsample_mean''

	ttest `var', by(incomplete_observations)
	matrix sample_comp_table[`rownum_se', 5] = `: display %7.3f `r(p)''
	
}

/* 
Multinomial f-test as per reference: Özler, Berk, Lia C.H. Fernald, 
Patricia Kariger, Christin McConnell, Michelle Neuman, and Eduardo Fraga (2018).
"Combining pre-school teacher training with parenting education: A 
cluster-randomized controlled trial." Journal of Development Economics 
133, 448–467. 
*/

mlogit incomplete_observations `balance_vars', baseoutcome(0)
test[Incomplete]
matrix sample_comp_table[53, 2] = `r(p)'

clear
gen variable = ""

local rownum = 1
foreach var in `balance_vars' {
	
	insobs 2
	replace variable = "`label_`var''" in `rownum'
	
	local rownum = `rownum' + 2
	
}

svmat sample_comp_table, names(table)
rename table1 full_sample_n
rename table2 full_sample_mean
rename table3 subsample_n
rename table4 subsample_mean
rename table5 diff

replace variable = "\hspace{0.25cm} " + variable
replace variable = "Joint F-test p-value" in 53

tostring full_sample_mean subsample_mean diff, replace format(%9.3f) force
	
tostring full_sample_n subsample_n, replace format(%9.0f) force

foreach var of varlist full_sample_n full_sample_mean subsample_n ///
	subsample_mean diff {
	
	replace `var' = "" if `var' == "."
	replace `var' = "(" + `var' + ")" if mod(_n, 2) == 0 & !missing(`var')
	
}

label variable variable ""
label variable full_sample_n "\multicolumn{1}{c}{\shortstack{Full\\Sample\\N}}"
label variable full_sample_mean "\multicolumn{1}{c}{\shortstack{Full\\Sample\\Mean\\(SE)}}"
label variable subsample_n "\multicolumn{1}{c}{\shortstack{Restricted\\Sample\\N}}"
label variable subsample_mean "\multicolumn{1}{c}{\shortstack{Restricted\\Sample\\Mean\\(SE)}}"
label variable diff "\multicolumn{1}{c}{\shortstack{Difference\\(p-value)}}"

insobs 1, before (1)
replace variable = "\textbf{Community Characteristics from 2015 Census}" in 1
insobs 1, before(36)
replace variable = "\textbf{Characteristics from Village Census}" in 36

texsave using "${tables}/sample_diff.tex", ///
	title(Comparison of Full and Restricted Sample) ///
	align(lccccc) location(H) ///
	replace ///
	noendash ///
	hlines(35 54) nofix ///
	size(2) varlabels ///
	bold("Joint F-test p-value")
	
replace variable = subinstr(variable, "\hspace{0.25cm}", "", .)
replace variable = subinstr(variable, "\textbf{", "", .)
replace variable = subinstr(variable, "}", "", 1) if regexm(variable, "}$")
replace variable = strtrim(variable)
	
export excel using "${tables}/sample_diff.xlsx", replace firstrow(varlabels)

*===============================================================================
* 	Supplementary Information
*===============================================================================

*------------------------------------------------------------------
*	1.1 Consort Diagram Level 1
*------------------------------------------------------------------

// 150 communities
use "${data}/community_data.dta", clear
unique community_code

// 50 control villages
count if treatment == 0

// 50 "door-to-door" villages
count if treatment == 1

// 50 "small group" villages
count if treatment == 2

*------------------------------------------------------------------
*	Consort Diagram Level 2
*------------------------------------------------------------------

// control census population
use "${data}/individual_level.dta", clear
count if treatment == 0 & in_census == 1

// control baseline sample
unique hh_id if in_baseline == 1 & treatment == 0
unique hh_id if incomplete_observations == 0 & in_baseline == 1 & treatment == 0

// dtd census population 
count if treatment == 1 & in_census == 1

// dtd baseline sample
unique hh_id if in_baseline == 1 & treatment == 1
unique hh_id if incomplete_observations == 0 & in_baseline == 1 & treatment == 1

// group census population 
count if treatment == 2 & in_census == 1

// group baseline sample
unique hh_id if in_baseline == 1 & treatment == 2
unique hh_id if incomplete_observations == 0 & in_baseline == 1 & treatment == 2

*------------------------------------------------------------------
*	Consort Diagram Level 3
*------------------------------------------------------------------

// dtd intervention 
count if treatment == 1 & vaccinated_team == 1

// dtd attrited
count if treatment == 1 & in_baseline == 1 & in_endline == 0

// dtd new
count if treatment == 1 & in_baseline == 0 & in_endline == 1

// group intervention 
count if treatment == 2 & vaccinated_team == 1

// group attrited
count if treatment == 2 & in_baseline == 1 & in_endline == 0

// group new
count if treatment == 2 & in_baseline == 0 & in_endline == 1

*------------------------------------------------------------------
*	Consort Diagram Level 4
*------------------------------------------------------------------

// dtd endline sample
unique hh_id if treatment == 1 & in_endline == 1
unique hh_id if treatment == 1 & incomplete_observations == 0 & in_endline == 1

// group endline sample
unique hh_id if treatment == 2 & in_endline == 1
unique hh_id if treatment == 2 & incomplete_observations == 0 & in_endline == 1
	
*------------------------------------------------------------------
*	1.2 Variation in Endline Vaccination Rate
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear
		
keep if above18 == 1 & in_census == 1

collapse (mean) vaccinated_endline ///
		(max) treatment, by(community_code)

histogram vaccinated_endline if inrange(treatment, 1, 2), ///
	freq color(gray%60) width(0.05) ylabel(0(5)15, nogrid) ///
	xlabel(0(0.05)0.75) xtitle(Share of People Vaccinated) ///
	plotregion(fcolor(white) lcolor(black))
	
graph export "${graphs}/comm_vax_rate_dist.svg", replace as(svg)
graph export "${graphs}/comm_vax_rate_dist.png", replace as(png)
			
*------------------------------------------------------------------
*	1.3 Variation in Number of People Vaccinated in Each Community
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear
		
collapse (sum) vaccinated_team (max) treatment, by(community_code)

histogram vaccinated_team if inrange(treatment, 1, 2), ///
	freq width(10) xlabel(0(10)150) ///
	xtitle("Number of Vaccines Administered in Community") ///
	plotregion(fcolor(white) lcolor(black)) ylabel(,nogrid) ///
	bgcolor(white) graphregion(fcolor(white)) color(gray%60)
	
graph export "${graphs}/comm_vax_count_dist.svg", replace as(svg)
graph export "${graphs}/comm_vax_count_dist.png", replace as(png)
	
*------------------------------------------------------------------
*	1.4 Variation in Number of Vaccines Administered per
*	Community by Vaccination Team
*------------------------------------------------------------------

use "${data}/individual_level.dta", clear
		
collapse (sum) vaccinated_team, by(socialmob community_code)

egen median = median(vaccinated_team), by(socialmob)
egen loq = pctile(vaccinated_team), by(socialmob) p(25)
egen upq = pctile(vaccinated_team) , by(socialmob) p(75)
egen min = min(vaccinated_team)
replace min = min + 3
egen n = count(vaccinated_team), by(socialmob) 
bysort socialmob (vaccinated_team): gen use=_n
gen shown = "{it:n} = " + string(n) 
gen socialmob2 = socialmob + 0.03
gen socialmob3 = socialmob - 0.03
		
stripplot vaccinated_team, ms(none) over(socialmob) ///
	box(barw(0.2) bfcolor("maroon") lcolor(black)) ///
	whiskers(lcolor(black) lwidth(med) msize(small)) ///
	vertical ///
	addplot(scatter median loq upq socialmob2 if use==1, ms(none ..) ///
	mla(median loq upq) mlabsize(*1.2 ..) || ///
	scatter min socialmob if use==1, ms(none) mla(shown) mlabcolor(black) ///
	mlabsize(*1.2) mlabpos(6)) ///
	xsc(r(. 5.25)) ysc(r(-.5 3)) xla(, noticks nolabel) ///
	ytitle("Number of Vaccines Administered in Community", size(medsmall)) ///
	xtitle("Vaccination Team", size(medium)) ///
	plotregion(fcolor(white) lcolor(black)) ///
	bgcolor(white) graphregion(fcolor(white)) ///
	pctile(0)
		
graph export "${graphs}/vac_team.svg", replace as(svg)
graph export "${graphs}/vac_team.png", replace as(png)
		
*------------------------------------------------------------------
*	2. Systematic Literature Review
*------------------------------------------------------------------

use "${data}/lit_review.dta", clear

drop if author1 == "THIS STUDY"

keep publicationyear effectsize country author1 vaccine classification
decode classification, gen(clas_str)
drop classification
order clas_str country vaccine author1 publicationyear effectsize
sort clas_str publicationyear author1
label variable clas_str "Intervention Type"

foreach var of varlist _all {
	
	local label : variable label `var'
	local label = strtrim(subinstr("`label'", "\hspace{0.25cm}", "", .))
	label variable `var' "`label'"
	
}

replace vaccine = subinstr(vaccine, "≥", "$\geq$", .)

// Make a series of tables 15 rows long so that they all roughly fit on 1 page
local i = 1
local t = 1
while `i' < 234 {
	
	texsave using "${tables}/effect_size_table_`t'" ///
		if _n >= `i' & _n < `i' + 15 ///
		, varlabels replace landscape rowsep(2mm)
	
	local ++t
	local i = `i' + 15
	
}

export excel using "${tables}/lit_review_table.xlsx", replace firstrow(varlabels)
